set.seed(3)
suppressPackageStartupMessages({
library(SingleCellExperiment)
library(rgl)
library(clusterExperiment)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
})
colby <- function(values, g=4){
if(is(values, "character")){
cols <- as.numeric(as.factor(values))
return(cols)
}
if(is(values, "factor")){
ncl <- nlevels(values)
if(ncl > 9){
colpal <- c(RColorBrewer::brewer.pal(9, 'Set1'), wesanderson::wes_palette("Darjeeling1", n=ncl-9, type="continuous"))
} else {
colpal <- RColorBrewer::brewer.pal(9, 'Set1')
}
cols <- colpal[as.numeric(values)]
return(cols)
}
if(is(values, "numeric")){
pal <- wesanderson::wes_palette("Zissou1", n=g, type="continuous")
gg <- Hmisc::cut2(values, g=g)
if(nlevels(gg) < g){
nl <- nlevels(gg)
if(nl == 2) pal <- pal[c(1,g)]
}
cols <- pal[gg]
return(cols)
}
}
cols <- brewer.pal(9,'Set1')
colsBig <- clusterExperiment::massivePalette
plotDRTimePoint <- function(dr, timePoint, cols, legendCex=1, legendPos="topright", ...){
plot(dr, col=cols[timePoint], pch=16, cex=1/2, xlab="UMAP1", ylab="UMAP2", bty='l', ...)
legend(legendPos, legend=levels(timePoint), col=cols[1:nlevels(timePoint)], pch=16, bty='n', cex=legendCex)
}
plotDRExpression <- function(dr, counts){
df = data.frame(dim1=dr[,1], dim2=dr[,2], y=counts)
df %>% ggplot(aes(x=dim1, y=dim2, col=y)) +
geom_point(size=2/3) + scale_color_viridis_c(alpha=.6) + theme_classic()
}
dataDir <- "../data"
cl <- readRDS(paste0(dataDir,"/cl_OE.rds"))
load(paste0(dataDir,"/regenK5_scone_none,fq,ruv_k=1,no_bio,batch_rsec_adjP_mergecutoff_0.01_20190609_085359.Rda"))
subset <- readRDS(paste0(dataDir,"/subset_index.rds"))
batch <- droplevels(colData(cl2)$batch[subset])
counts <- assays(cl2)$counts[,subset]
colnames(counts) <- colData(cl2)$samples[subset]
timePoint <- as.character(colData(cl2)$batch[subset])
timePoint <- factor(unlist(lapply(strsplit(timePoint, split="_"), "[[", 1)),
levels=c("24HPI", "48HPI", "96HPI", "7DPI", "14DPI", "Mix"))
rm(cl2) ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7416384 396.1 14481390 773.4 NA 9988747 533.5
## Vcells 337225457 2572.9 926475190 7068.5 102400 770606619 5879.3
Doublets were identified by Boying Gong using DoubletFinder on each individual sample.
# Remove Doublets
doublets <- read.csv(paste0(dataDir,"/doublet_prediction_sample.csv"), stringsAsFactors = FALSE)
hist(doublets$doubletFinderScore)
doubID <- which(doublets$doubletFinderPred)
doubletCells <- doublets$cell[doubID] #271 doublets (~1%)
counts_noDoubs <- counts[,-doubID]
timePoint_noDoubs <- timePoint[-doubID]
cl_noDoubs <- cl[-doubID]
library(uwot)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
library(irlba)
### top 100 PCs
set.seed(15)
fVar <- rowVars(counts_noDoubs)
fMean <- rowMeans(counts_noDoubs)
highVarFeatures <- order(fVar, decreasing=TRUE)[1:500]
pcIk <- irlba(log1p(counts_noDoubs[highVarFeatures,]), nv=100)
# UMAP on principal directions
umIk50 <- uwot::umap(pcIk$v, n_components=3)
rownames(umIk50) <- colnames(counts_noDoubs)
plot(umIk50, col=alpha(brewer.pal(6,'Set1')[timePoint_noDoubs],.4), pch=16, cex=1/2,
xlab="UMAP1", ylab="UMAP2", bty='l')
legend("bottomright",levels(timePoint),pch=16,col=brewer.pal(6,'Set1'), bty='n')
plot(umIk50, col=alpha(brewer.pal(9,'Set1')[factor(cl_noDoubs)],.4), pch=16, cex=1/2,
xlab="UMAP1", ylab="UMAP2", bty='l')
oldCelltype <- c("SUS", "IN3", "rHBC", "IN2", "GBC", "Neuron", "IN1", "HBC", "MV")
oldCelltype <- factor(oldCelltype, levels=oldCelltype)
legend("bottomright",levels(oldCelltype),pch=16,col=brewer.pal(9,'Set1'), bty='n')
plot3d(umIk50, aspect = 'iso', col=brewer.pal(9,'Set1')[factor(cl_noDoubs)], alpha=.3)
plot3d(umIk50, aspect = 'iso', col=brewer.pal(6,'Set1')[timePoint_noDoubs], alpha=.3)
# 24h and 48h cells at mature neuron stage are true neurons => some neurons slipped through
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Omp",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Scg2",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Neurod1",]), alpha=.3)
# SUS
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Cyp2g1",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Cyp1a2",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Vmo1",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Muc2",]), alpha=.3)
# HBC
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Krt5",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Krt14",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Krt15",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Trp63",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Gpr87",]), alpha=.3)
# GBC
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Sprr1a",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Ascl1",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Prim1",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Tk1",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Plk1",]), alpha=.3)
# MV
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Ascl3",]), alpha=.3)
plot3d(umIk50, aspect = 'iso', col=colby(counts_noDoubs["Trpm5",]), alpha=.3)
plot3d(umIk50, aspect = 'iso',
col=cols[timePoint_noDoubs],
alpha=.6)
bridgeRegionID <- (umIk50[,1] < -3.3 & umIk50[,1] > -4.5) & (umIk50[,2] < 0)
plot3d(umIk50, aspect = 'iso',
col=colby(factor(bridgeRegionID), g=2), alpha=.6)
bridgeAllCells <- which(bridgeRegionID)
bridgeID <- bridgeAllCells[timePoint_noDoubs[bridgeAllCells] %in% levels(timePoint_noDoubs)[1:3]]
# plot cells to be removed
bridgeLogical <- vector(length=nrow(umIk50))
bridgeLogical[bridgeID] <- TRUE
plot3d(umIk50, aspect = 'iso',
col=colby(factor(bridgeLogical), g=2), alpha=.4)
bridgeCellNames <- rownames(umIk50)[bridgeID]
rm(bridgeLogical, bridgeAllCells)
plot3d(umIk50, aspect = 'iso',
col=cols[timePoint_noDoubs],
alpha=.6)
mnCellID <- which(umIk50[,1] < -6.5)
mnCellID <- mnCellID[timePoint_noDoubs[mnCellID] %in% levels(timePoint_noDoubs)[1:2]]
mnLogical <- vector(length=nrow(umIk50))
mnLogical[mnCellID] <- TRUE
plot3d(umIk50, aspect = 'iso',
col=colby(factor(mnLogical), g=2), alpha=.4)
rm(mnLogical)
rmAsync <- c(bridgeID, mnCellID)
umIk50_async <- umIk50[-rmAsync,]
counts_noDoubs_async <- counts_noDoubs[,-rmAsync]
timePoint_noDoubs_async <- timePoint_noDoubs[-rmAsync]
cl_noDoubs_async <- cl_noDoubs[-rmAsync]
rm(counts, counts_noDoubs) ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7610549 406.5 14481390 773.4 NA 14481390 773.4
## Vcells 333060472 2541.1 1111850228 8482.8 102400 1056817977 8062.9
dataDavid <- "../data/objectsFromDavid"
davidGenes <- read.table(paste0(dataDavid, "/genes_used.txt"), stringsAsFactors = FALSE)[,1]
davidPC <- readRDS(paste0(dataDavid, "/pcs.rds"))
davidAnnotation <- read.csv(paste0(dataDavid, "/Datta_annotation_HBC_lineage_newPCs.csv"))
clDavid <- davidAnnotation$leiden_0_6_caleb20PCs
mergedCl <- vector(length=nrow(davidAnnotation))
mergedCl[clDavid %in% c(13,9,6,1)] <- "HBC*"
mergedCl[clDavid %in% c(3,0,5,15)] <- "HBC"
mergedCl[clDavid %in% c(11)] <- "GBC"
mergedCl[clDavid %in% c(11)] <- "GBC"
mergedCl[clDavid %in% c(10,12,4)] <- "iOSN"
mergedCl[clDavid %in% c(8)] <- "mOSN"
mergedCl[clDavid %in% c(2)] <- "Sus"
mergedCl[clDavid %in% c(14)] <- "MV"
mergedCl[clDavid %in% c(7)] <- "RE HBC"
mergedCl[clDavid %in% c(16)] <- "RE"
mergedCl <- factor(mergedCl)
umDavid1 <- uwot::umap(davidPC, n_neighbors=20, min_dist=0.5)
plotDRTimePoint(dr=umDavid1, timePoint=factor(clDavid), cols=colsBig, legendCex=2/3)
plotDRTimePoint(dr=umDavid1, timePoint=factor(mergedCl), cols=colsBig, legendCex=4/5, legendPos="bottomright")
plotDRTimePoint(dr=umDavid1, timePoint=timePoint_noDoubs_async, cols=brewer.pal(9, 'Set1'), legendPos="bottomright")
plotDRTimePoint(dr=umDavid1, timePoint=factor(cl_noDoubs_async), cols=brewer.pal(9, 'Set1'), legendCex=4/5, legendPos="bottomleft", main="clusterExperiment labels")
normDavid <- sweep(counts_noDoubs_async, 2, colSums(counts_noDoubs_async), "/") * median(colSums(counts_noDoubs_async))
## genes that have been removed
davidGenes[!davidGenes %in% rownames(normDavid)]
## [1] "4930523C07Rik" "4631405K08Rik" "2900093K20Rik" "1700013H16Rik"
## [5] "Sprr2j-ps" "1110017D15Rik" "1500011B03Rik" "RP24-492L15.6"
## [9] "8430408G22Rik" "2200002D01Rik" "Hbb-bs" "RP23-4H17.3"
## [13] "1500009L16Rik" "1810011O10Rik" "1700012B09Rik" "2810417H13Rik"
## [17] "5730403I07Rik" "6330403K07Rik" "1700016K19Rik" "Krtap3-2"
## [21] "Krtap2-4" "Krtap17-1" "Olfr465-ps1" "1700001L19Rik"
## [25] "1700086L19Rik" "2300005B03Rik" "2610037D02Rik" "4732456N10Rik"
## [29] "1700097N02Rik" "H2-K1" "H2-Ab1" "H2-Aa"
## [33] "H2-Eb1" "H2-D1" "H2-T23" "2900055J20Rik"
## [37] "2610016A17Rik" "4430402I18Rik" "mt-Nd1" "mt-Nd2"
## [41] "mt-Co1" "mt-Co2" "mt-Atp6" "mt-Co3"
## [45] "mt-Nd3" "mt-Nd4" "mt-Nd5" "mt-Cytb"
## [49] "CreER\""
davidGenes2 <- intersect(davidGenes, rownames(normDavid))
## genes that have been removed
hvGenes <- order(rowVars(normDavid), decreasing=TRUE)[1:length(davidGenes2)]
pc100David_hvg <- irlba(log1p(normDavid[hvGenes,]), nv=100)
umDavid_hvg <- uwot::umap(pc100David_hvg$v, n_neighbors=20, min_dist=0.5)
plotDRTimePoint(dr=umDavid_hvg, timePoint=factor(mergedCl), cols=colsBig, legendCex=4/5, legendPos="topleft", main="Datta cluster labels")
plotDRTimePoint(dr=umDavid_hvg, timePoint=factor(timePoint_noDoubs_async), cols=brewer.pal(9, 'Set1'), legendCex=4/5, legendPos="topleft", main="Time point")
plotDRTimePoint(dr=umDavid_hvg, timePoint=factor(cl_noDoubs_async), cols=brewer.pal(9, 'Set1'), legendCex=4/5, legendPos="topleft", main="clusterExperiment labels")
Check expression of respiratory genes and other markers
# Respiratory cells
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Adh7",])) + ggtitle("Adh7")
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Reg3g",])) + ggtitle("Reg3g")
# HBC
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Krt5",])) + ggtitle("Krt5")
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Krt14",])) + ggtitle("Krt14")
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Trp63",])) + ggtitle("Trp63")
# HBC*
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Sprr1a",])) + ggtitle("Sprr1a")
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Krtdap",])) + ggtitle("Krtdap")
# GBC
plotDRExpression(umDavid_hvg, log1p(counts_noDoubs_async["Ascl1",])) + ggtitle("Ascl1")
counts_noResp_noMV <- counts_noDoubs_async[,!mergedCl %in% c("RE", "RE HBC", "MV")]
# counts
saveRDS(counts_noResp_noMV, file="../data/finalTrajectory/counts_noResp_noMV.rds")
# timePoints
timePoint_noResp_noMV <- timePoint_noDoubs_async[!mergedCl %in% c("RE", "RE HBC", "MV")]
saveRDS(timePoint_noResp_noMV, file="../data/finalTrajectory/timePoint_noResp_noMV.rds")
# Datta labels
mergedCl_noResp_noMV <- mergedCl[!mergedCl %in% c("RE", "RE HBC", "MV")]
saveRDS(mergedCl_noResp_noMV, file="../data/finalTrajectory/dattaCl_noResp_noMV.rds")
# clusterExperiment labels
cl_noResp_noMV <- timePoint_noDoubs_async[!mergedCl %in% c("RE", "RE HBC", "MV")]
saveRDS(cl_noResp_noMV, file="../data/finalTrajectory/rsecCl.rds")
sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] irlba_2.3.3 uwot_0.1.5
## [3] Matrix_1.2-18 dplyr_0.8.4
## [5] ggplot2_3.3.2 RColorBrewer_1.1-2
## [7] clusterExperiment_2.6.1 bigmemory_4.5.36
## [9] rgl_0.100.30 SingleCellExperiment_1.8.0
## [11] SummarizedExperiment_1.16.1 DelayedArray_0.12.2
## [13] BiocParallel_1.20.1 matrixStats_0.56.0
## [15] Biobase_2.46.0 GenomicRanges_1.38.0
## [17] GenomeInfoDb_1.22.0 IRanges_2.20.2
## [19] S4Vectors_0.24.3 BiocGenerics_0.32.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.5 uuid_0.1-2 Hmisc_4.3-1
## [4] NMF_0.21.0 plyr_1.8.5 lazyeval_0.2.2
## [7] splines_3.6.2 crosstalk_1.0.0 rncl_0.8.3
## [10] gridBase_0.4-7 digest_0.6.23 foreach_1.4.7
## [13] htmltools_0.4.0 wesanderson_0.3.6 checkmate_2.0.0
## [16] magrittr_1.5 memoise_1.1.0 cluster_2.1.0
## [19] doParallel_1.0.15 limma_3.42.1 annotate_1.64.0
## [22] RcppParallel_4.4.4 prettyunits_1.1.1 jpeg_0.1-8.1
## [25] colorspace_1.4-1 blob_1.2.1 xfun_0.12
## [28] crayon_1.3.4 RCurl_1.98-1.1 jsonlite_1.6.1
## [31] bigmemory.sri_0.1.3 genefilter_1.68.0 phylobase_0.8.6
## [34] survival_3.1-8 iterators_1.0.12 ape_5.3
## [37] glue_1.3.1 registry_0.5-1 gtable_0.3.0
## [40] zlibbioc_1.32.0 XVector_0.26.0 webshot_0.5.2
## [43] kernlab_0.9-29 Rhdf5lib_1.8.0 HDF5Array_1.14.1
## [46] scales_1.1.0 DBI_1.1.0 edgeR_3.28.0
## [49] rngtools_1.5 bibtex_0.4.2.2 miniUI_0.1.1.1
## [52] Rcpp_1.0.3 viridisLite_0.3.0 xtable_1.8-4
## [55] progress_1.2.2 htmlTable_1.13.3 foreign_0.8-72
## [58] bit_1.1-15.2 Formula_1.2-3 htmlwidgets_1.5.1
## [61] httr_1.4.1 acepack_1.4.1 pkgconfig_2.0.3
## [64] XML_3.99-0.3 farver_2.0.3 nnet_7.3-12
## [67] locfit_1.5-9.1 labeling_0.3 howmany_0.3-1
## [70] tidyselect_1.0.0 rlang_0.4.4.9000 manipulateWidget_0.10.0
## [73] softImpute_1.4 reshape2_1.4.3 later_1.0.0
## [76] AnnotationDbi_1.48.0 munsell_0.5.0 tools_3.6.2
## [79] RSQLite_2.2.0 ade4_1.7-13 evaluate_0.14
## [82] stringr_1.4.0 fastmap_1.0.1 yaml_2.2.1
## [85] knitr_1.28 bit64_0.9-7 purrr_0.3.3
## [88] nlme_3.1-142 mime_0.9 xml2_1.3.2
## [91] rstudioapi_0.11 compiler_3.6.2 png_0.1-7
## [94] tibble_2.1.3 RNeXML_2.4.2 stringi_1.4.5
## [97] RSpectra_0.16-0 lattice_0.20-38 vctrs_0.2.2
## [100] pillar_1.4.3 lifecycle_0.1.0 RcppAnnoy_0.0.14
## [103] zinbwave_1.11.6 data.table_1.12.8 bitops_1.0-6
## [106] httpuv_1.5.2 R6_2.4.1 latticeExtra_0.6-29
## [109] promises_1.1.0 gridExtra_2.3 codetools_0.2-16
## [112] MASS_7.3-51.4 assertthat_0.2.1 rhdf5_2.30.1
## [115] pkgmaker_0.31 withr_2.1.2 GenomeInfoDbData_1.2.2
## [118] locfdr_1.1-8 hms_0.5.3 grid_3.6.2
## [121] rpart_4.1-15 tidyr_1.0.2 rmarkdown_2.1
## [124] shiny_1.4.0 base64enc_0.1-3